Analysis of first run of the benchmarking experiment

Author

György Barabás

1 Loading and tidying the data

We first set up some functions to load and tidy the raw data:

library(tidyverse)
library(broom)
library(ggfortify)
library(jsonlite)
library(knitr)
library(mblm)



serversFromConfig <- function(configFile = "../config.json") {
  fromJSON(configFile) |>
    as_tibble() |>
    select(contains("dl")) |>
    mutate(server = str_c("Server ", 1:3), .before = 1) |>
    rename_with(\(x) str_remove(x, "_dl_servers"), !server) |>
    pivot_longer(!server, names_to = "platform", values_to = "ip") |>
    mutate(platform = case_match(
      platform,
      "swarm" ~ "Swarm",
      "ipfs"  ~ "IPFS",
      "arw"   ~ "Arweave"
    ))
}


dataFromJsonRaw <- function(jsonFile = "../results.json") {
  jsonlite::fromJSON(jsonFile) |>
    as_tibble() |>
    unnest(tests) |>
    unnest(results) |>
    rename(time_sec = download_time_seconds,
           replicate = ref,
           platform = storage)
}


dataFromJson <- function(rawTable) {
  rawTable |>
    mutate(sha256_match = (sha256_match == "true")) |>
    mutate(platform = ifelse(platform == "Ipfs", "IPFS", platform)) |>
    mutate(size_kb = as.integer(size)) |>
    select(!size & !server & !timestamp) |>
    left_join(serversFromConfig(), by = join_by(platform, ip)) |>
    relocate(size_kb, server, time_sec, attempts, sha256_match,
             .after = platform)
}

After loading and tidying the data, here’s what the first few rows of the table look like:

dat <- dataFromJson(dataFromJsonRaw())

dat |>
  head(n = 10) |>
  kable()
platform size_kb server time_sec attempts sha256_match ip latitude longitude replicate
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27
Swarm 1 Server 2 0.1379130 1 TRUE 188.245.154.61:1633 49.4542 11.0775 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27
Swarm 1 Server 3 0.1654890 1 TRUE 188.245.177.151:1633 49.4542 11.0775 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7
Swarm 1 Server 2 0.3394287 1 TRUE 188.245.154.61:1633 49.4542 11.0775 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7
Swarm 1 Server 3 0.1937695 1 TRUE 188.245.177.151:1633 49.4542 11.0775 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56
Swarm 1 Server 2 0.3230913 1 TRUE 188.245.154.61:1633 49.4542 11.0775 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56
Swarm 1 Server 3 0.3515806 1 TRUE 188.245.177.151:1633 49.4542 11.0775 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 7049fe5e08fd855c3b89788a317f51bf844c20ecf3c5f71f863d8a7c9ed2af0d

We can do some sanity checks. First of all, the experiment is well balanced, with 30 replicates per size, server, and platform:

dat |>
  count(size_kb, server, platform, name = "number of replicates") |>
  count(`number of replicates`,
        name = "size-server-platform combinations") |>
  kable()
number of replicates size-server-platform combinations
30 45

And the replicates are also correctly assigned:

dat |>
  count(server, replicate, name = "number of replicates") |>
  count(`number of replicates`,
        name = "server-replicate combinations") |>
  kable()
number of replicates server-replicate combinations
1 1350

Let us check if any of the sha256 matches failed:

dat |>
  count(sha256_match) |>
  kable()
sha256_match n
FALSE 150
TRUE 1200

Indeed, there are 150 failures. Let us check where those failed attempts are:

dat |>
  filter(!sha256_match) |>
  count(platform, size_kb, server) |>
  kable()
platform size_kb server n
Swarm 1 Server 1 30
Swarm 10 Server 1 30
Swarm 100 Server 1 30
Swarm 1000 Server 1 30
Swarm 10000 Server 1 30

In short, all Swarm downloads on Server 1 have failed, and nothing else.

Those same failed downloads also always had 15 download attempts. All other downloads succeeded in a single attempt:

dat |>
  count(platform, attempts, server) |>
  pivot_wider(names_from = platform, values_from = attempts) |>
  relocate(Swarm, IPFS, Arweave, .after = n) |>
  kable()
server n Swarm IPFS Arweave
Server 1 150 15 1 1
Server 2 150 1 1 1
Server 3 150 1 1 1

So everything in the data look OK at first glance except for the (Swarm, Server 1) combination.

2 Preliminary analysis

Plotting the raw results, we get:

dat |>
  filter(sha256_match) |>
  select(platform | size_kb | server | time_sec) |>
  mutate(platform = fct_reorder(platform, time_sec)) |>
  mutate(size = case_when(
    size_kb ==     1 ~ "1 KB",
    size_kb ==    10 ~ "10 KB",
    size_kb ==   100 ~ "100 KB",
    size_kb ==  1000 ~ "1 MB",
    size_kb == 10000 ~ "10 MB"
  )) |>
  mutate(size = fct_reorder(size, size_kb)) |>
  ggplot(aes(x = time_sec, color = platform, fill = platform)) +
  geom_density(alpha = 0.2, bw = 0.05) +
  scale_x_log10() +
  labs(x = "Retrieval time (seconds)", y = "Density",
       color = "Platform: ", fill = "Platform: ") +
  scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  scale_fill_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  facet_grid(server ~ size, scales = "fixed") +
  theme_bw() +
  theme(legend.position = "bottom", panel.grid = element_blank())

Here we have retrieval times (on the log scale) along the x-axis and density of incidence along the y-axis. The curves are higher where there are more data. Colors represent the different storage platforms; facet rows are the different servers used, and facet columns are the various data sizes.

At a glance, we see that IPFS is the fastest. For small files, Swarm is faster than Arweave. For 10MB files, it is a bit slower but still comparable. Somewhat strangely, the Swarm distributions look bimodal, even on Server 2 and Server 3 where the downloads succeeded. This should probably be investigated further.

Now we check the relationship between file size and download times, for each unique platform-server combination (removing the faulty (Swarm, Server 1) data, of course):

mergePlatformServer <- function(dat) {
  dat |>
    # Make sure platform-server combinations can be properly sorted:
    mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
    arrange(platform, server, size_kb) |>
    # Merge platform-server combinations, for plotting purposes:
    mutate(plat_serv = as_factor(str_c(platform, ", ", server)))
}

plotPlatformServerFit <- function(dat, x, y, formula = y ~ x, method = lm,
                                  log_y = FALSE) {
  ggplot(dat, aes(x = {{x}}, y = {{y}})) +
    geom_point(color = "steelblue", alpha = 0.5) +
    geom_smooth(method = method, color = "goldenrod", fill = "goldenrod",
                formula = formula) +
    scale_x_log10() +
    { if (log_y) scale_y_log10() else scale_y_continuous() } +
    labs(x = "File size (KB)", y = "Download time (seconds)") +
    facet_wrap(~ plat_serv, scales = "free_y") +
    theme_bw()
}

dat |>
  mergePlatformServer() |>
  # Remove faulty data points, replacing the download times of 0 with NA:
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec)

The general trend is always to have longer download times for larger files, as expected. We can analyze this pattern further by performing a linear regression for each platform-server combination:

regressionDat <- dat |>
  filter(sha256_match) |>
  mutate(predictor = log10(size_kb), response = time_sec) |>
  nest(data = !platform & !server) |>
  mutate(fit = map(data, \(dat) lm(response ~ predictor, data = dat))) |>
  mutate(regtab = map(fit, broom::tidy)) |>
  unnest(regtab)

Then we can inspect the regression statistics both for the intercepts:

regressionDat |>
  select(!data & !fit) |>
  filter(term == "(Intercept)") |>
  kable()
platform server term estimate std.error statistic p.value
Swarm Server 2 (Intercept) -1.4296923 0.3544905 -4.0330902 0.0000879
Swarm Server 3 (Intercept) -1.1277605 0.3039713 -3.7100892 0.0002925
IPFS Server 1 (Intercept) -0.0746538 0.0802125 -0.9307002 0.3535243
IPFS Server 2 (Intercept) 0.0628410 0.0260047 2.4165247 0.0168870
IPFS Server 3 (Intercept) 0.0100561 0.0083878 1.1988908 0.2324865
Arweave Server 1 (Intercept) 0.8667627 0.0654446 13.2442136 0.0000000
Arweave Server 2 (Intercept) 1.4500342 0.0252266 57.4802735 0.0000000
Arweave Server 3 (Intercept) 1.5916742 0.0902377 17.6386924 0.0000000

And the slopes:

regressionDat |>
  select(!data & !fit) |>
  filter(term != "(Intercept)") |>
  mutate(term = "slope") |>
  kable()
platform server term estimate std.error statistic p.value
Swarm Server 2 slope 2.0558179 0.1447202 14.205470 0
Swarm Server 3 slope 1.7955934 0.1240958 14.469418 0
IPFS Server 1 slope 0.4546123 0.0327466 13.882729 0
IPFS Server 2 slope 0.0971226 0.0106164 9.148381 0
IPFS Server 3 slope 0.0837132 0.0034243 24.446783 0
Arweave Server 1 slope 0.4335902 0.0267177 16.228598 0
Arweave Server 2 slope 0.2494251 0.0102987 24.219006 0
Arweave Server 3 slope 0.4123861 0.0368394 11.194169 0

As seen, the model thinks that there is no way the positive slopes are due to just chance. The same is true for most intercepts except two (maybe three) of them—all of which are for IPFS. This makes intuitive sense, because IPFS data are stored in a way that the general overhead of downloading might indeed be the smallest.

The above is contingent on the assumptions of the linear regression model being fulfilled. To check whether that is so, let us make diagnostic plots:

regressionDat |>
  filter(term != "(Intercept)") |>
  mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
  arrange(platform, server) |>
  mutate(diagnostics = map(fit, \(x) {
    autoplot(x, smooth.colour = NA, alpha = 0.3, colour = "steelblue") +
      theme_bw()
  } )) |>
  mutate(diagnostics = pmap(list(diagnostics, platform, server), \(dia, sto, se) {
    gridExtra::grid.arrange(grobs = dia@plots, top = str_c(sto, ", ", se))
  } )) |>
  suppressMessages() |>
  capture.output() |>
  invisible()

Most of these diagnostics do not look good. For Swarm, there is a clear relationship between the residuals and fitted values. (This is not surprising, given the manifestly nonlinear relationship that we are capturing using the linear model.) The quantile-quantile plot also looks ugly for Server 2 (for Server 3, it looks decent). IPFS suffers from the same problems of non-independent residuals and bad Q-Q distributions, for all servers. So does Arweave, on Server 1, although the diagnostics for Server 2 look very good, and acceptable for Server 3 (where the only problem is the presence of a few extreme outliers).

Given these problems with nonnormality of the residuals, it might be better to also perform a non-parametric (Theil–Sen) regression. Here are the fitted lines:

dat |>
  mergePlatformServer() |>
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec,  method = \(formula, data, weights)
                        mblm(formula, data))

These are qualitatively the same as before, although some of the slopes are smaller because Theil–Sen regression correctly identifies outliers and ignores them. Re-generating the regression tables:

theilSenDat <- dat |>
  filter(sha256_match) |>
  mutate(predictor = log10(size_kb), response = time_sec) |>
  nest(data = !platform & !server) |>
  mutate(fit = map(data, \(dat) mblm(response ~ predictor, dat))) |>
  mutate(regtab = map(fit, broom::tidy)) |>
  unnest(regtab)

theilSenDat |>
  select(!data & !fit) |>
  filter(term == "(Intercept)") |>
  kable()
platform server term estimate std.error statistic p.value
Swarm Server 2 (Intercept) -0.2107474 0.5248384 2642 0.0000000
Swarm Server 3 (Intercept) -0.3736978 0.7753667 2107 0.0000000
IPFS Server 1 (Intercept) 0.0291476 0.1420760 6445 0.1423164
IPFS Server 2 (Intercept) 0.0006500 0.0665083 5475 0.7256958
IPFS Server 3 (Intercept) 0.0018331 0.0342697 5629 0.9506298
Arweave Server 1 (Intercept) 0.9266490 0.1818643 11243 0.0000000
Arweave Server 2 (Intercept) 1.3528714 0.1397821 11325 0.0000000
Arweave Server 3 (Intercept) 1.5311779 0.2194461 11316 0.0000000
theilSenDat |>
  select(!data & !fit) |>
  filter(term != "(Intercept)") |>
  mutate(term = "slope") |>
  kable()
platform server term estimate std.error statistic p.value
Swarm Server 2 slope 0.5626411 0.3953407 11303 0
Swarm Server 3 slope 0.6538622 0.5710144 11217 0
IPFS Server 1 slope 0.1859053 0.0838380 11178 0
IPFS Server 2 slope 0.0670717 0.0342535 10899 0
IPFS Server 3 slope 0.0632168 0.0173881 11325 0
Arweave Server 1 slope 0.2592064 0.0860815 11325 0
Arweave Server 2 slope 0.2748845 0.0575947 11325 0
Arweave Server 3 slope 0.3487619 0.0856913 11285 0

Qualitatively, everything is the same as before.

3 Building a predictive model

The previous models convincingly establish a relationship between log file size and download times that is not simply due to chance. But they are not good models for prediction, because the relationships are manifestly nonlinear, yet the fitted curve was a linear function.

One improvement that can be done is to put the download times on the log scale as well. This produces visibly more linear relationships, revealing that the “true” dependence of download times on size may be described well by a power law:

dat |>
  mergePlatformServer() |>
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec, log_y = TRUE)

But other models appear to do just as well if not better. Let us try a quadratic model:

dat |>
  mergePlatformServer() |>
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec, formula = y ~ I(x^2),
                        log_y = TRUE)

And a cubic one:

dat |>
  mergePlatformServer() |>
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec, formula = y ~ I(x^3),
                        log_y = TRUE)

And an exponential:

dat |>
  mergePlatformServer() |>
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec, formula = y ~ exp(x),
                        log_y = TRUE)

Let us compate these models. We will fit them all and extract relevant regression statistics, then compare AIC and BIC scores to perform model selection:

modelComparison <- dat |>
  mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
  arrange(platform, server, size_kb) |>
  mutate(plat_serv = as_factor(str_c(platform, ", ", server))) |>
  filter(sha256_match) |>
  mutate(x = log10(size_kb), y = log10(time_sec)) |>
  select(plat_serv, x, y) |>
  crossing(formula = list("linear"      = formula(y ~ x),
                          "quadratic"   = formula(y ~ I(x^2)),
                          "cubic"       = formula(y ~ I(x^3)),
                          "exponential" = formula(y ~ exp(x)))) |>
  mutate(model = names(formula)) |>
  nest(data = x | y) |>
  mutate(fit = map2(formula, data, lm)) |>
  mutate(regression = map(fit, tidy),
         quality = map(fit, glance))

It is instructive to look at the diagnostic plots, which now look much better in almost all cases than before:

modelComparison |>
  mutate(model = fct_relevel(model, "linear", "quadratic",
                             "cubic", "exponential")) |>
  arrange(model) |>
  mutate(diagnostics = map(fit, \(x) {
    autoplot(x, smooth.colour = NA, alpha = 0.3, colour = "steelblue") +
      theme_bw()
  } )) |>
  mutate(diagnostics = pmap(list(diagnostics, plat_serv, model), \(dia, ps, m) {
    gridExtra::grid.arrange(grobs = dia@plots, top = str_c(ps, ", ", m))
  } )) |>
  suppressMessages() |>
  capture.output() |>
  invisible()

We can now look at AIC and BIC scores. Here is a table with AIC first:

modelComparison |>
  unnest(quality) |>
  select(plat_serv | model | AIC) |>
  pivot_wider(names_from = model, values_from = AIC) |>
  rename(`platform, server` = plat_serv) |>
  kable()
platform, server linear quadratic cubic exponential
Swarm, Server 2 74.65295 -24.45972 -25.32414 -18.30025
Swarm, Server 3 99.41558 51.70865 53.57531 66.33704
IPFS, Server 1 -26.73906 -108.95065 -106.96873 -98.67160
IPFS, Server 2 -17.85868 -135.50458 -145.74801 -100.76498
IPFS, Server 3 -293.78867 -410.59626 -422.68609 -240.91414
Arweave, Server 1 -348.44062 -474.17289 -488.16129 -487.35987
Arweave, Server 2 -536.61144 -569.06277 -583.28061 -508.01725
Arweave, Server 3 -304.82322 -317.63970 -318.17512 -302.36202

And then BIC:

modelComparison |>
  unnest(quality) |>
  select(plat_serv | model | BIC) |>
  pivot_wider(names_from = model, values_from = BIC) |>
  rename(`platform, server` = plat_serv) |>
  kable()
platform, server linear quadratic cubic exponential
Swarm, Server 2 83.684861 -12.41717 -10.27097 -9.268347
Swarm, Server 3 108.447484 63.75119 68.62849 75.368945
IPFS, Server 1 -17.707158 -96.90811 -91.91555 -89.639699
IPFS, Server 2 -8.826774 -123.46204 -130.69484 -91.733077
IPFS, Server 3 -284.756767 -398.55372 -407.63291 -231.882236
Arweave, Server 1 -339.408711 -462.13035 -473.10811 -478.327965
Arweave, Server 2 -527.579530 -557.02023 -568.22743 -498.985347
Arweave, Server 3 -295.791316 -305.59716 -303.12195 -293.330113

Finding the best models:

modelComparison |>
  unnest(quality) |>
  select(plat_serv | model | AIC) |>
  filter(AIC == min(AIC), .by = plat_serv) |>
  rename(`platform, server` = plat_serv) |>
  kable()
platform, server model AIC
Swarm, Server 2 cubic -25.32414
Swarm, Server 3 quadratic 51.70865
IPFS, Server 1 quadratic -108.95065
IPFS, Server 2 cubic -145.74801
IPFS, Server 3 cubic -422.68609
Arweave, Server 1 cubic -488.16129
Arweave, Server 2 cubic -583.28061
Arweave, Server 3 cubic -318.17512
modelComparison |>
  unnest(quality) |>
  select(plat_serv | model | BIC) |>
  filter(BIC == min(BIC), .by = plat_serv) |>
  rename(`platform, server` = plat_serv) |>
  kable()
platform, server model BIC
Swarm, Server 2 quadratic -12.41717
Swarm, Server 3 quadratic 63.75119
IPFS, Server 1 quadratic -96.90811
IPFS, Server 2 cubic -130.69484
IPFS, Server 3 cubic -407.63291
Arweave, Server 1 exponential -478.32796
Arweave, Server 2 cubic -568.22743
Arweave, Server 3 quadratic -305.59716

Both the AIC and the more conservative BIC prefer the quadratic and cubic models, to about the same degree. For simplicity, I will use the quadratic model here.

Let us check the regression results for the quadratic model:

modelComparison |>
  filter(model == "quadratic") |>
  unnest(regression) |>
  select(plat_serv, term, estimate, std.error, statistic, p.value) |>
  kable()
plat_serv term estimate std.error statistic p.value
Swarm, Server 2 (Intercept) -0.5333916 0.0264855 -20.138968 0
Swarm, Server 2 I(x^2) 0.0916372 0.0031477 29.112495 0
Swarm, Server 3 (Intercept) -0.5678050 0.0329265 -17.244608 0
Swarm, Server 3 I(x^2) 0.0918986 0.0039132 23.484397 0
IPFS, Server 1 (Intercept) -0.6221900 0.0199304 -31.218204 0
IPFS, Server 1 I(x^2) 0.0590490 0.0023686 24.929531 0
IPFS, Server 2 (Intercept) -0.9944977 0.0206892 -48.068529 0
IPFS, Server 2 I(x^2) 0.0457231 0.0024588 18.595551 0
IPFS, Server 3 (Intercept) -1.1639666 0.0073854 -157.603346 0
IPFS, Server 3 I(x^2) 0.0504575 0.0008777 57.486682 0
Arweave, Server 1 (Intercept) 0.0566884 0.0059488 9.529338 0
Arweave, Server 1 I(x^2) 0.0250531 0.0007070 35.436060 0
Arweave, Server 2 (Intercept) 0.2004186 0.0041812 47.932773 0
Arweave, Server 2 I(x^2) 0.0134367 0.0004969 27.039724 0
Arweave, Server 3 (Intercept) 0.2575318 0.0095876 26.860983 0
Arweave, Server 3 I(x^2) 0.0173633 0.0011394 15.238440 0

The good news: the parameter values appear consistent across platforms. We could use them to formulate a prediction about the performance of these platforms, as a function of file size. We can compute the average parameter estimates across servers and use that average as the representative fit for each platform:

paramTab <- modelComparison |>
  filter(model == "quadratic") |>
  unnest(regression) |>
  separate(plat_serv, into = c("platform", "server"), sep = ", ") |>
  select(platform, server, term, estimate, std.error, statistic, p.value) |>
  summarize(estimate = mean(estimate), .by = c(platform, term)) |>
  mutate(term = ifelse(term == "(Intercept)", "a", "b")) |>
  pivot_wider(names_from = term, values_from = estimate)

paramTab |>
  kable()
platform a b
Swarm -0.5505983 0.0917679
IPFS -0.9268848 0.0517432
Arweave 0.1715463 0.0186177

The general relationship is then \[ \log_{10}(t) = a + b \log_{10}^2(s) , \tag{1}\] where \(t\) is download time, \(s\) is file size, and \(a\) and \(b\) are the fitted parameters summarized in the table above. So we have the following models:

Swarm \(\log_{10}(t) = -0.551 + 0.0918 \log_{10}^2(s)\)
IPFS \(\log_{10}(t) = -0.927 + 0.0517 \log_{10}^2(s)\)
Arweave \(\log_{10}(t) = +0.172 + 0.0186 \log_{10}^2(s)\)

This predicts that for small file sizes, IPFS is best. Looking at the parameters \(b\), it is also clear that Swarm will never catch up, but Arweave will. In turn, Swarm starts out faster than Arweave (smaller \(a\)) but is eventually taken over by it (larger \(b\) for Swarm). We can plot the three curves:

paramTab |>
  mutate(platform = as_factor(platform)) |>
  mutate(curve = map2(a, b, \(a, b) {
    tibble(log_s = seq(0, 7, l = 101)) |>
      mutate(log_t = a + b * log_s^2)
  } )) |>
  unnest(curve) |>
  mutate(s = 10^log_s, t = 10^log_t) |>
  ggplot(aes(x = s, y = t, color = platform)) +
  geom_line(linewidth = 1) +
  labs(x = "File size", y = "Predicted download time", color = NULL) +
  scale_x_log10(breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")) +
  scale_y_log10(breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")) +
  scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  theme_bw()